easypackages::packages ("sf", "sp", "spdep", "spatialreg", "GWmodel", "tmap", "mapview", "car", "RColorBrewer", 
                        "cowplot", "leafsync", "leaflet.extras2", "mapview", "tidyverse", "patchwork")
## Loading required package: sf
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
## Loading required package: sp
## Loading required package: spdep
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: spatialreg
## Loading required package: Matrix
## 
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
## 
##     get.ClusterOption, get.coresOption, get.mcOption,
##     get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption
## Loading required package: GWmodel
## Loading required package: robustbase
## Loading required package: Rcpp
## Welcome to GWmodel version 2.3-2.
## Loading required package: tmap
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
## Loading required package: mapview
## Loading required package: car
## Loading required package: carData
## Loading required package: RColorBrewer
## Loading required package: cowplot
## Loading required package: leafsync
## Loading required package: leaflet.extras2
## Loading required package: leaflet
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand()    masks Matrix::expand()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ tidyr::pack()      masks Matrix::pack()
## ✖ dplyr::recode()    masks car::recode()
## ✖ purrr::some()      masks car::some()
## ✖ lubridate::stamp() masks cowplot::stamp()
## ✖ tidyr::unpack()    masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: patchwork
## 
## 
## Attaching package: 'patchwork'
## 
## 
## The following object is masked from 'package:cowplot':
## 
##     align_plots
## 
## 
## All packages loaded successfully
project_directory <- dirname(getwd())

Loading Data

Toronto Map

toronto_geojson_file <- file.path(project_directory, "data", "geodata", "neighbourhoods.geojson")
toronto <- st_read(toronto_geojson_file)
## Reading layer `neighbourhoods' from data source 
##   `/Users/michelleuni/Documents/UU/Spatial_Analysis/Term Project/data/geodata/neighbourhoods.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 140 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -79.63926 ymin: 43.581 xmax: -79.11527 ymax: 43.85546
## Geodetic CRS:  WGS 84
# the AREA_NAME is the name of the neighbourhood
# we need to clean the text so it is the same as all the other files
toronto$AREA_NAME <- tolower(trimws(gsub("\\(\\d+\\)", "", toronto$AREA_NAME)))
toronto$AREA_NAME <- gsub("\\(\\d+\\)", "", toronto$AREA_NAME)
toronto$AREA_NAME <- gsub("[[:punct:]]", "", toronto$AREA_NAME)

names(toronto)[names(toronto) == "AREA_NAME"] <- "neighbourhood"


toronto$neighbourhood[which(toronto$neighbourhood == 'cabbagetownsouth stjames town')] <- 'cabbagetownsouth st james town'
toronto$neighbourhood[which(toronto$neighbourhood == 'north stjames town')] <- 'north st james town'

toronto <- toronto[order(toronto$neighbourhood), ]

toronto$neighbourhood
##   [1] "agincourt north"                   "agincourt southmalvern west"      
##   [3] "alderwood"                         "annex"                            
##   [5] "banburydon mills"                  "bathurst manor"                   
##   [7] "bay street corridor"               "bayview village"                  
##   [9] "bayview woodssteeles"              "bedford parknortown"              
##  [11] "beechboroughgreenbrook"            "bendale"                          
##  [13] "birchcliffecliffside"              "black creek"                      
##  [15] "blakejones"                        "briar hillbelgravia"              
##  [17] "bridle pathsunnybrookyork mills"   "broadview north"                  
##  [19] "brookhavenamesbury"                "cabbagetownsouth st james town"   
##  [21] "caledoniafairbank"                 "casa loma"                        
##  [23] "centennial scarborough"            "churchyonge corridor"             
##  [25] "clairleabirchmount"                "clanton park"                     
##  [27] "cliffcrest"                        "corso italiadavenport"            
##  [29] "danforth"                          "danforth east york"               
##  [31] "don valley village"                "dorset park"                      
##  [33] "dovercourtwallace emersonjunction" "downsviewrodingcfb"               
##  [35] "dufferin grove"                    "east enddanforth"                 
##  [37] "edenbridgehumber valley"           "eglinton east"                    
##  [39] "elmsold rexdale"                   "englemountlawrence"               
##  [41] "eringatecentennialwest deane"      "etobicoke west mall"              
##  [43] "flemingdon park"                   "forest hill north"                
##  [45] "forest hill south"                 "glenfieldjane heights"            
##  [47] "greenwoodcoxwell"                  "guildwood"                        
##  [49] "henry farm"                        "high park north"                  
##  [51] "high parkswansea"                  "highland creek"                   
##  [53] "hillcrest village"                 "humber heightswestmount"          
##  [55] "humber summit"                     "humbermede"                       
##  [57] "humewoodcedarvale"                 "ionview"                          
##  [59] "islingtoncity centre west"         "junction area"                    
##  [61] "keelesdaleeglinton west"           "kennedy park"                     
##  [63] "kensingtonchinatown"               "kingsview villagethe westway"     
##  [65] "kingsway south"                    "lambton baby point"               
##  [67] "lamoreaux"                         "lansingwestgate"                  
##  [69] "lawrence park north"               "lawrence park south"              
##  [71] "leasidebennington"                 "little portugal"                  
##  [73] "long branch"                       "malvern"                          
##  [75] "maple leaf"                        "markland wood"                    
##  [77] "milliken"                          "mimico includes humber bay shores"
##  [79] "morningside"                       "moss park"                        
##  [81] "mount dennis"                      "mount olivesilverstonejamestown"  
##  [83] "mount pleasant east"               "mount pleasant west"              
##  [85] "new toronto"                       "newtonbrook east"                 
##  [87] "newtonbrook west"                  "niagara"                          
##  [89] "north riverdale"                   "north st james town"              
##  [91] "oakridge"                          "oakwood village"                  
##  [93] "oconnorparkview"                   "old east york"                    
##  [95] "palmerstonlittle italy"            "parkwoodsdonalda"                 
##  [97] "pelmo parkhumberlea"               "playter estatesdanforth"          
##  [99] "pleasant view"                     "princessrosethorn"                
## [101] "regent park"                       "rexdalekipling"                   
## [103] "rockcliffesmythe"                  "roncesvalles"                     
## [105] "rosedalemoore park"                "rouge"                            
## [107] "runnymedebloor west village"       "rustic"                           
## [109] "scarborough village"               "south parkdale"                   
## [111] "south riverdale"                   "standrewwindfields"               
## [113] "steeles"                           "stonegatequeensway"               
## [115] "tam oshantersullivan"              "taylormassey"                     
## [117] "the beaches"                       "thistletownbeaumond heights"      
## [119] "thorncliffe park"                  "trinitybellwoods"                 
## [121] "university"                        "victoria village"                 
## [123] "waterfront communitiesthe island"  "west hill"                        
## [125] "west humberclairville"             "westminsterbranson"               
## [127] "weston"                            "westonpelham park"                
## [129] "wexfordmaryvale"                   "willowdale east"                  
## [131] "willowdale west"                   "willowridgemartingroverichview"   
## [133] "woburn"                            "woodbine corridor"                
## [135] "woodbinelumsden"                   "wychwood"                         
## [137] "yongeeglinton"                     "yongestclair"                     
## [139] "york university heights"           "yorkdaleglen park"
toronto_map <- ggplot() +
  geom_sf(data = toronto, fill = "lightblue") +
  theme_void()

toronto_map

Census 2016

The census files contain 63 variables that will be used as indicators.

#census2011_path <- file.path(project_directory, "data", "census2011.csv")
#census2011 <- read.csv(census2011_path, row.names = 1)

census2016_path <- file.path(project_directory, "data", "census2016.csv")
census2016 <- read.csv(census2016_path, row.names = 1)
census2016 <- census2016[order(census2016$neighbourhood), ]

#census2021_path <- file.path(project_directory, "data", "census2021.csv")
#census2021 <- read.csv(census2021_path, row.names = 1)
names(census2016)
##  [1] "neighbourhood"                 "single_detached_house"        
##  [3] "apart_5_plus"                  "other_dwelling"               
##  [5] "avg_household_size"            "married"                      
##  [7] "not_married"                   "single_parents"               
##  [9] "one_person_household"          "two_plus_person_household"    
## [11] "low_income_percent"            "non_immigrants"               
## [13] "immigrants"                    "owner"                        
## [15] "renter"                        "own_room"                     
## [17] "sharing_room"                  "suitable_housing"             
## [19] "not_suitable_housing"          "total_work_activity"          
## [21] "unemployed"                    "employed"                     
## [23] "commute_drives"                "commute_passenger"            
## [25] "commute_public_transport"      "commute_walk"                 
## [27] "commute_cycle"                 "commute_other"                
## [29] "pop_0_to_4"                    "pop_5_to_9"                   
## [31] "pop_10_to_14"                  "pop_15_to_19"                 
## [33] "pop_20_to_24"                  "pop_25_to_29"                 
## [35] "pop_30_to_34"                  "pop_35_to_39"                 
## [37] "pop_40_to_44"                  "pop_45_to_49"                 
## [39] "pop_50_to_54"                  "pop_55_to_59"                 
## [41] "pop_60_to_64"                  "pop_65_to_69"                 
## [43] "pop_70_to_74"                  "pop_75_to_79"                 
## [45] "pop_80_to_84"                  "pop_85_to_89"                 
## [47] "pop_90_to_94"                  "pop_95_to_99"                 
## [49] "pop_100_plus"                  "household_income"             
## [51] "income_under_5000"             "income_between_5000_9999"     
## [53] "income_between_10000_14999"    "income_between_50000_59999"   
## [55] "income_above_100k"             "income_between_15000_19999"   
## [57] "income_between_20000_29999"    "income_between_30000_39999"   
## [59] "income_between_40000_49999"    "income_between_60000_79999"   
## [61] "income_between_80000_99999"    "no_certificate_diploma_degree"
## [63] "highschool_diploma"            "post_secondary_diploma"       
## [65] "total_population"              "num_crimes"

Add Population Density

toronto$area <- st_area(toronto)
#census2011$population_density <- census2011$total_population/toronto$area
census2016$population_density <- census2016$total_population/toronto$area
#census2021$population_density <- census2021$total_population/toronto$area

Add Toronto Geometries to Census 2016

census2016_with_geom <- merge(census2016, toronto, by.x = "neighbourhood", by.y = "neighbourhood", all.x = TRUE)
census2016_sf <- st_as_sf(census2016_with_geom)
st_crs(census2016_sf) <- st_crs(toronto)

Add Parks

parks_geojson_file <- file.path(project_directory, "data", "geodata", "parks.geojson")
parks <- st_read(parks_geojson_file)
## Reading layer `parks' from data source 
##   `/Users/michelleuni/Documents/UU/Spatial_Analysis/Term Project/data/geodata/parks.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 1702 features and 82 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -79.62752 ymin: 43.581 xmax: -79.1242 ymax: 43.84753
## Geodetic CRS:  WGS 84
names(parks)[names(parks) == "AREA_NAME"] <- "neighbourhood"
parks <- parks[order(parks$neighbourhood), ]
parks$neighbourhood <- tolower(parks$neighbourhood)
parks$neighbourhood <- gsub("[[:punct:]0-9]", "", parks$neighbourhood)
parks <- parks %>%
  group_by(neighbourhood)

# Calculate the area of each park
parks <- parks %>%
  mutate(park_area = st_area(geometry))

# Sum the area per neighbourhood
parks <- parks %>%
  group_by(neighbourhood) %>%
  summarise(total_park_area = sum(park_area))

# Add the neighbourhood area
parks <- parks %>%
  mutate(neighbourhood_area = toronto$area)

# Calculate how much of the neighbourhood is a park
parks <- parks %>%
  mutate(proportion = total_park_area/neighbourhood_area)
parks <- parks[order(parks$neighbourhood), ]
census2016_sf <- census2016_sf %>%
  mutate(proportion_park_area = parks$proportion)

census2016_sf$proportion_park_area <- as.numeric(census2016_sf$proportion_park_area)

Add Libraries

libraries_geojson_file <- file.path(project_directory, "data", "geodata", "libraries.geojson")
libraries<- st_read(libraries_geojson_file)
## Reading layer `libraries' from data source 
##   `/Users/michelleuni/Documents/UU/Spatial_Analysis/Term Project/data/geodata/libraries.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 108 features and 63 fields
## Geometry type: MULTIPOINT
## Dimension:     XY
## Bounding box:  xmin: -79.61925 ymin: 43.59905 xmax: -79.14005 ymax: 43.82411
## Geodetic CRS:  WGS 84
names(libraries)[names(libraries) == "AREA_NAME"] <- "neighbourhood"
libraries <- libraries[order(libraries$neighbourhood), ]
# data cleaning
libraries$neighbourhood <- tolower(libraries$neighbourhood)
libraries$neighbourhood <- gsub("[[:punct:]0-9]", "", libraries$neighbourhood)
libraries$neighbourhood <- trimws(libraries$neighbourhood)

libraries$neighbourhood[which(libraries$neighbourhood == 'cabbagetownsouth stjames town')] <- 'cabbagetownsouth st james town'
libraries$neighbourhood[which(libraries$neighbourhood == 'north stjames town')] <- 'north st james town'
# group by neighbourhood to sum up number of libraries in each
libraries <- libraries %>%
  group_by(neighbourhood) %>%
  summarise(num_libraries = n())
# some neighbourhoods have no libraries, but we still need them in the data
missing_neighborhoods <- setdiff(toronto$neighbourhood, libraries$neighbourhood)
length(missing_neighborhoods)
## [1] 77
toronto_missing <- toronto[toronto$neighbourhood %in% missing_neighborhoods, ]
toronto_missing <- subset(toronto, neighbourhood %in% missing_neighborhoods, select = c("neighbourhood", "geometry"))
toronto_missing$num_libraries <- 0
# add the missing neighbourhoods
libraries <- rbind(libraries, toronto_missing)
libraries <- libraries[order(libraries$neighbourhood), ]
census2016_sf <- census2016_sf %>%
  mutate(num_libraries = libraries$num_libraries)

Add Police Facilities

police_geojson_file <- file.path(project_directory, "data", "geodata", "distance_to_police.geojson")
police <- st_read(police_geojson_file)
## Reading layer `distance_to_police' from data source 
##   `/Users/michelleuni/Documents/UU/Spatial_Analysis/Term Project/data/geodata/distance_to_police.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 140 features and 13 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: -79.59636 ymin: 43.59236 xmax: -79.15084 ymax: 43.8212
## Geodetic CRS:  WGS 84
names(police)[names(police) == "AREA_NAME"] <- "neighbourhood"
police <- police[order(police$neighbourhood), ]
police$neighbourhood <- tolower(police$neighbourhood)
police$neighbourhood <- gsub("[[:punct:]0-9]", "", police$neighbourhood)
police$neighbourhood <- trimws(police$neighbourhood)
police <- police[order(police$neighbourhood), ]
census2016_sf <- census2016_sf %>%
  mutate(closest_police_facility = police$HubDist)

This shows the closest police facility for every neighbourhood (measured in km).

# Overlay libraries
police_plot <- toronto_map +
  geom_sf(data = police, color = "yellow", size = 1) +
  theme_void()

# Print the map
print(police_plot)

Add Subway Stops

subway_geojson_file <- file.path(project_directory, "data", "geodata", "subway_stops.geojson")
subway <- st_read(subway_geojson_file)
## Reading layer `subway_stops' from data source 
##   `/Users/michelleuni/Documents/UU/Spatial_Analysis/Term Project/data/geodata/subway_stops.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 614 features and 37 fields
## Geometry type: MULTIPOINT
## Dimension:     XY
## Bounding box:  xmin: -79.53769 ymin: 43.63569 xmax: -79.26372 ymax: 43.78225
## Geodetic CRS:  WGS 84
names(subway)[names(subway) == "AREA_NAME"] <- "neighbourhood"
subway$neighbourhood <- tolower(subway$neighbourhood)
subway$neighbourhood <- gsub("[[:punct:]0-9]", "", subway$neighbourhood)
subway$neighbourhood <- trimws(subway$neighbourhood)
subway$neighbourhood[which(subway$neighbourhood == 'cabbagetownsouth stjames town')] <- 'cabbagetownsouth st james town'
subway$neighbourhood[which(subway$neighbourhood == 'north stjames town')] <- 'north st james town'
# group by neighbourhood to sum up number of stops in each
subway <- subway %>%
  group_by(neighbourhood) %>%
  summarise(num_subway_stops = n())
# some neighbourhoods have no stops, but we still need them in the data
missing_neighborhoods2 <- setdiff(toronto$neighbourhood, subway$neighbourhood)

toronto_missing2 <- toronto[toronto$neighbourhood %in% missing_neighborhoods2, ]
toronto_missing2 <- subset(toronto, neighbourhood %in% missing_neighborhoods2, select = c("neighbourhood", "geometry"))
toronto_missing2$num_subway_stops <- 0
# add the missing neighbourhoods
subway <- rbind(subway, toronto_missing2)
subway <- subway[order(subway$neighbourhood), ]
census2016_sf <- census2016_sf %>%
  mutate(num_subway_stops = subway$num_subway_stops)

Add Red Light Cameras

cam_geojson_file <- file.path(project_directory, "data", "geodata", "cam2016.csv")
cameras <- read.csv(cam_geojson_file)
cameras <- cameras[order(cameras$neighbourhood), ]
cameras$neighbourhood[which(cameras$neighbourhood == 'cabbagetownsouth stjames town')] <- 'cabbagetownsouth st james town'
cameras$neighbourhood[which(cameras$neighbourhood == 'north stjames town')] <- 'north st james town'
# add geometries
subset_toronto <- toronto %>%
  filter(neighbourhood %in% cameras$neighbourhood)

cameras <- cameras %>%
  mutate(geometry = subset_toronto$geometry)
# some neighbourhoods have no stops, but we still need them in the data
missing_neighborhoods3 <- setdiff(toronto$neighbourhood, cameras$neighbourhood)

toronto_missing3 <- toronto[toronto$neighbourhood %in% missing_neighborhoods3, ]
toronto_missing3 <- subset(toronto, neighbourhood %in% missing_neighborhoods3, select = c("neighbourhood"))
toronto_missing3$num_cameras <- 0
cameras <- cameras[order(cameras$neighbourhood), ]
cameras <- rbind(cameras, toronto_missing3)
census2016_sf <- census2016_sf %>%
  mutate(num_cameras = cameras$num_cameras)

Exploratory Spatial Data Analysis

# change fill to see different maps
ggplot() +
  geom_sf(data = census2016_sf, aes(fill = num_crimes)) +
  scale_fill_gradient(name = "Number of crimes", low = "grey", high = "red") +
  theme_minimal()

Spatial Autocorrelation of Dependent Variable

#Continuity
crimedata_nbq <- poly2nb(census2016_sf, queen=TRUE) 
crimedata_nbq_w <- nb2listw(crimedata_nbq, style="W", zero.policy = TRUE) #Queen’s 

coordsW <- census2016_sf%>%
  st_centroid()%>%
  st_geometry()
## Warning: st_centroid assumes attributes are constant over geometries
#KNN
#crimedata_KNN <- knearneigh(coordsW, k=4) 
#crimedata_nbq_KNN <- knn2nb(crimedata_KNN, sym=T)
#crimedata_KNN_w <- nb2listw(crimedata_nbq_KNN, style="W", zero.policy = TRUE)

plot(crimedata_nbq, st_geometry(coordsW), col="red")

mc_global <- moran.mc(census2016_sf$num_crimes, crimedata_nbq_w, 2999, alternative="greater") 

#plot the  Moran’s I
plot(mc_global)

mc_global
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  census2016_sf$num_crimes 
## weights: crimedata_nbq_w  
## number of simulations + 1: 3000 
## 
## statistic = 0.34892, observed rank = 3000, p-value = 0.0003333
## alternative hypothesis: greater

Functions for analysis

Linear Regression

#calculation
linear_regression_analysis <- function(equation, data) {
    linearMod <- lm(equation, data = data)
    summary_info <- summary(linearMod)
    mc_global_OLS <- moran.mc(linearMod$residuals, crimedata_nbq_w, 2999, zero.policy = TRUE, alternative = "greater")
    vif_values <- vif(linearMod)
    
    # Save results
    linear_results <- list(model = linearMod, summary = summary_info, vif = vif_values, moran=mc_global_OLS)
    return(linear_results)
}
plot_residuals <- function(linear_results){
  qtm(residuals(linear_results$linearMod))
}

LISA

# calculation
lisa_analysis <- function(data) {
    gm_crime_LISA <- localmoran(data$num_crimes, crimedata_nbq_w)
    summary_info <- summary(gm_crime_LISA)
    
    # Save results
    lisa_results <- list(gm_crime_LISA = gm_crime_LISA, summary = summary_info)
    return(lisa_results)
}
# visualisation
visualise_LISA <- function(data, gm_crime_LISA) {
    sf_object <- data 
    
    sf_object$gm_crime_LISA <- gm_crime_LISA[, 1]
    p_values <- gm_crime_LISA[, 5]

    map_LISA <- tm_shape(sf_object) + 
        tm_polygons(col = "gm_crime_LISA", title = "Local Moran’s I", midpoint = 0,
                    palette = "RdYlBu", breaks = c(-1, -0.5, 0, 0.5, 1, 2))
    
    map_LISA_p <- tm_shape(sf_object) + 
        tm_polygons(col = "gm_crime_LISA", title = "p-values",
                    breaks = c(0, 0.01, 0.05, 1), palette = "Reds")

    tmap_arrange(map_LISA, map_LISA_p)
}
# crime clusters 
visualize_LISA_clusters <- function(data_sf, LISA_data, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan')) {
  # Calculate mean value of the variable under consideration
  meanVal <- mean(data_sf$num_crimes)
  
  # Create a copy of LISA data to preserve original results
  LISA_type <- LISA_data
  
  # Create a tibble object storing the LISA values into different classes of significance
  LISA_mapping <- LISA_type %>%
    tibble::as_tibble() %>%
    magrittr::set_colnames(c("Ii", "E.Ii", "Var.Ii", "Z.Ii", "Pr(z > 0)")) %>%
    dplyr::mutate(coType = dplyr::case_when(
      `Pr(z > 0)` > significanceLevel ~ "Insignificant", # Greater than significance level, thus Insignificant
      `Pr(z > 0)` <= significanceLevel & Ii >= 0 & data_sf$num_crimes >= meanVal ~ "HH", # High-High local Moran's I values
      `Pr(z > 0)` <= significanceLevel & Ii >= 0 & data_sf$num_crimes < meanVal ~ "LL", # Low-Low local Moran's I values
      `Pr(z > 0)` <= significanceLevel & Ii < 0 & data_sf$num_crimes >= meanVal ~ "HL", # High-Low local Moran's I values
      `Pr(z > 0)` <= significanceLevel & Ii < 0 & data_sf$num_crimes < meanVal ~ "LH" # Low-High local Moran's I values
    ))
  
  # Join back to the main polygon data
  data_sf$coType <- LISA_mapping$coType %>% tidyr::replace_na("Insignificant")
  
  # Map the clusters
  ggplot(data_sf) +
    geom_sf(aes(fill = coType)) +
    scale_fill_manual(values = colors, name = 'Clusters & \nOutliers') +
    labs(title = "Crime clusters") +
    theme(panel.background = element_rect(fill = 'transparent'))
}

GWR

# calculation
gwr_analysis <- function(equation, data, i) {
    crimedata2016_sp <- as_Spatial(data)
    print(paste("---Equation", i, "---"))
    abw <- bw.gwr(equation, approach = "AIC", adaptive = TRUE, kernel = "gaussian", data = crimedata2016_sp)
    
    a.gwr <- gwr.basic(equation, adaptive = TRUE, kernel = "gaussian", bw = abw, data = crimedata2016_sp)
    
    # Save results
    gwr_results <- list(a.gwr = a.gwr)
    
    return(gwr_results)
}
# visualisation
extract_predictors <- function(equation) {
    predictors <- attr(terms(equation), "term.labels")
    return(predictors)
}

visualise_gwr <- function(agwr_sf, equation) {
    predictors <- extract_predictors(equation)
    
    maps <- lapply(predictors, function(predictor) {
        qtm_map <- qtm(agwr_sf, predictor)
        return(qtm_map)
    })
    
    tmap_arrange(maps, asp = 5, ncol = 1)
}

Processing

Equations

# only social indicators
eq1 <- num_crimes ~ unemployed + household_income + no_certificate_diploma_degree + commute_walk
eq2 <- num_crimes ~ unemployed + household_income + no_certificate_diploma_degree + commute_public_transport
eq3 <- num_crimes ~ unemployed + household_income + no_certificate_diploma_degree + commute_public_transport + single_parents

# only physical indicators - change these to include Asha's physical indicators
eq4 <- num_crimes ~ num_cameras + proportion_park_area + closest_police_facility
eq5 <- num_crimes ~ population_density + proportion_park_area + closest_police_facility + num_subway_stops
eq6 <- num_crimes ~ proportion_park_area + closest_police_facility + num_cameras

# both social and physical indicators
eq7 <- num_crimes ~ unemployed +  + commute_public_transport + population_density + proportion_park_area
eq8 <-  num_crimes ~ household_income + no_certificate_diploma_degree + num_cameras + population_density
eq9 <- num_crimes ~ low_income_percent + population_density + num_subway_stops + no_certificate_diploma_degree
eq10 <- num_crimes ~ proportion_park_area + unemployed + population_density + no_certificate_diploma_degree + household_income


# List of equations
equations <- list(eq1, eq2, eq3, eq4, eq5, eq6, eq7, eq8, eq9, eq10)

Calculations

linear_results_list <- list()
lisa_results_list <- list()
gwr_results_list <- list()

# Perform analysis for each equation
for (i in seq_along(equations)) {
    eq <- equations[[i]]
    
    # Linear Regression Analysis
    linear_results_list[[i]] <- linear_regression_analysis(eq, census2016_sf)
    
    # LISA Analysis
    lisa_results_list[[i]] <- lisa_analysis(census2016_sf)
    
    # GWR Analysis
    gwr_results_list[[i]] <- gwr_analysis(eq, census2016_sf, i)
}
## [1] "---Equation 1 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1760.241 
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1756.92 
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1753.423 
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1750.522 
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1748.303 
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1747.261 
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1746.78 
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1746.737 
## Adaptive bandwidth (number of nearest neighbours): 20 AICc value: 1747.365 
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1746.459 
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1746.459 
## [1] "---Equation 2 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1783.573 
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1780.546 
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1776.387 
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1772.109 
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1767.095 
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1764.025 
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1762.681 
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1762.711 
## Adaptive bandwidth (number of nearest neighbours): 24 AICc value: 1763.131 
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1762.553 
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1762.553 
## [1] "---Equation 3 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1781.222 
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1775.519 
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1767.925 
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1761.35 
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1756.199 
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1753.178 
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1752.493 
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1753.059 
## Adaptive bandwidth (number of nearest neighbours): 24 AICc value: 1752.79 
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1752.547 
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1752.493 
## [1] "---Equation 4 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1904.455 
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1903.733 
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1900.686 
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1897.09 
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1894.362 
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1892.383 
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1891.444 
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1891.096 
## Adaptive bandwidth (number of nearest neighbours): 20 AICc value: 1891.498 
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1891.047 
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1891.047 
## [1] "---Equation 5 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1857.355 
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1856.905 
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1855.55 
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1854.73 
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1853.745 
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1853.508 
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1853.501 
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1853.679 
## Adaptive bandwidth (number of nearest neighbours): 24 AICc value: 1853.7 
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1853.185 
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1853.185 
## [1] "---Equation 6 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1904.455 
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1903.733 
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1900.686 
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1897.09 
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1894.362 
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1892.383 
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1891.444 
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1891.096 
## Adaptive bandwidth (number of nearest neighbours): 20 AICc value: 1891.498 
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1891.047 
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1891.047 
## [1] "---Equation 7 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1824.117 
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1819.372 
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1813.101 
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1806.292 
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1799.875 
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1794.906 
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1791.349 
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1788.752 
## Adaptive bandwidth (number of nearest neighbours): 20 AICc value: 1787.138 
## Adaptive bandwidth (number of nearest neighbours): 19 AICc value: 1784.808 
## Adaptive bandwidth (number of nearest neighbours): 19 AICc value: 1784.808 
## [1] "---Equation 8 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1784.154 
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1781.083 
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1777.658 
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1775.712 
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1773.202 
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1771.712 
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1770.57 
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1770.53 
## Adaptive bandwidth (number of nearest neighbours): 20 AICc value: 1770.348 
## Adaptive bandwidth (number of nearest neighbours): 19 AICc value: 1769.09 
## Adaptive bandwidth (number of nearest neighbours): 19 AICc value: 1769.09 
## [1] "---Equation 9 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1840.426 
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1839.856 
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1838.918 
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1838.712 
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1838.271 
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1838.123 
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1838.247 
## Adaptive bandwidth (number of nearest neighbours): 27 AICc value: 1838.167 
## Adaptive bandwidth (number of nearest neighbours): 24 AICc value: 1838.087 
## Adaptive bandwidth (number of nearest neighbours): 24 AICc value: 1838.087 
## [1] "---Equation 10 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1779.979 
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1776.633 
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1772.516 
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1768.819 
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1765.395 
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1763.111 
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1762.292 
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1762.3 
## Adaptive bandwidth (number of nearest neighbours): 24 AICc value: 1762.521 
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1761.99 
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1761.99

Linear Regression Plots

# equation 1
linear_results_list[[1]]
## $model
## 
## Call:
## lm(formula = equation, data = data)
## 
## Coefficients:
##                   (Intercept)                     unemployed  
##                     15.435032                      -0.002092  
##              household_income  no_certificate_diploma_degree  
##                      0.012563                       0.037000  
##                  commute_walk  
##                      0.057229  
## 
## 
## $summary
## 
## Call:
## lm(formula = equation, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -433.27  -55.99  -16.01   25.81  717.82 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   15.435032  25.374382   0.608 0.544016    
## unemployed                    -0.002092   0.009019  -0.232 0.816917    
## household_income               0.012563   0.006286   1.999 0.047651 *  
## no_certificate_diploma_degree  0.037000   0.010403   3.557 0.000519 ***
## commute_walk                   0.057229   0.010883   5.259 5.54e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 129.5 on 135 degrees of freedom
## Multiple R-squared:  0.6571, Adjusted R-squared:  0.6469 
## F-statistic: 64.67 on 4 and 135 DF,  p-value: < 2.2e-16
## 
## 
## $vif
##                    unemployed              household_income 
##                      6.880903                      7.525150 
## no_certificate_diploma_degree                  commute_walk 
##                      3.171722                      3.880383 
## 
## $moran
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  linearMod$residuals 
## weights: crimedata_nbq_w  
## number of simulations + 1: 3000 
## 
## statistic = 0.076061, observed rank = 2874, p-value = 0.042
## alternative hypothesis: greater
# equation 2
linear_results_list[[2]]
## $model
## 
## Call:
## lm(formula = equation, data = data)
## 
## Coefficients:
##                   (Intercept)                     unemployed  
##                     -21.52553                       -0.02597  
##              household_income  no_certificate_diploma_degree  
##                       0.05018                        0.04327  
##      commute_public_transport  
##                      -0.03021  
## 
## 
## $summary
## 
## Call:
## lm(formula = equation, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -309.39  -59.17  -22.17   27.73  743.16 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   -21.525534  26.117903  -0.824 0.411298    
## unemployed                     -0.025968   0.008486  -3.060 0.002670 ** 
## household_income                0.050176   0.006080   8.253 1.24e-13 ***
## no_certificate_diploma_degree   0.043272   0.011463   3.775 0.000239 ***
## commute_public_transport       -0.030208   0.014509  -2.082 0.039234 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 140 on 135 degrees of freedom
## Multiple R-squared:  0.5997, Adjusted R-squared:  0.5878 
## F-statistic: 50.56 on 4 and 135 DF,  p-value: < 2.2e-16
## 
## 
## $vif
##                    unemployed              household_income 
##                      5.218472                      6.030366 
## no_certificate_diploma_degree      commute_public_transport 
##                      3.298805                      5.143706 
## 
## $moran
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  linearMod$residuals 
## weights: crimedata_nbq_w  
## number of simulations + 1: 3000 
## 
## statistic = 0.18061, observed rank = 2997, p-value = 0.001
## alternative hypothesis: greater
# equation 3
linear_results_list[[3]]
## $model
## 
## Call:
## lm(formula = equation, data = data)
## 
## Coefficients:
##                   (Intercept)                     unemployed  
##                     -22.27344                       -0.02150  
##              household_income  no_certificate_diploma_degree  
##                       0.04995                        0.05335  
##      commute_public_transport                 single_parents  
##                      -0.02757                       -0.05376  
## 
## 
## $summary
## 
## Call:
## lm(formula = equation, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -303.72  -67.72  -18.41   28.95  739.01 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   -22.273444  26.145779  -0.852  0.39579    
## unemployed                     -0.021502   0.009786  -2.197  0.02972 *  
## household_income                0.049952   0.006088   8.205 1.68e-13 ***
## no_certificate_diploma_degree   0.053353   0.015877   3.360  0.00101 ** 
## commute_public_transport       -0.027570   0.014799  -1.863  0.06466 .  
## single_parents                 -0.053759   0.058546  -0.918  0.36015    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 140 on 134 degrees of freedom
## Multiple R-squared:  0.6022, Adjusted R-squared:  0.5873 
## F-statistic: 40.57 on 5 and 134 DF,  p-value: < 2.2e-16
## 
## 
## $vif
##                    unemployed              household_income 
##                      6.930897                      6.040073 
## no_certificate_diploma_degree      commute_public_transport 
##                      6.321437                      5.345046 
##                single_parents 
##                     10.593496 
## 
## $moran
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  linearMod$residuals 
## weights: crimedata_nbq_w  
## number of simulations + 1: 3000 
## 
## statistic = 0.19135, observed rank = 2996, p-value = 0.001333
## alternative hypothesis: greater
# equation 4
linear_results_list[[4]]
## $model
## 
## Call:
## lm(formula = equation, data = data)
## 
## Coefficients:
##             (Intercept)              num_cameras     proportion_park_area  
##                 367.832                   -4.988                 -256.239  
## closest_police_facility  
##                 -39.488  
## 
## 
## $summary
## 
## Call:
## lm(formula = equation, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -232.04 -127.14  -53.54   40.88 1040.75 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              367.832     42.919   8.570 2.01e-14 ***
## num_cameras               -4.988     12.918  -0.386  0.70000    
## proportion_park_area    -256.239    215.370  -1.190  0.23621    
## closest_police_facility  -39.488     14.355  -2.751  0.00676 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 212.4 on 136 degrees of freedom
## Multiple R-squared:  0.07146,    Adjusted R-squared:  0.05098 
## F-statistic: 3.489 on 3 and 136 DF,  p-value: 0.01757
## 
## 
## $vif
##             num_cameras    proportion_park_area closest_police_facility 
##                1.012933                1.037584                1.028877 
## 
## $moran
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  linearMod$residuals 
## weights: crimedata_nbq_w  
## number of simulations + 1: 3000 
## 
## statistic = 0.33258, observed rank = 3000, p-value = 0.0003333
## alternative hypothesis: greater
# equation 5
linear_results_list[[5]]
## $model
## 
## Call:
## lm(formula = equation, data = data)
## 
## Coefficients:
##             (Intercept)       population_density     proportion_park_area  
##                 198.752                    9.584                  -14.275  
## closest_police_facility         num_subway_stops  
##                 -14.284                    1.812  
## 
## 
## $summary
## 
## Call:
## lm(formula = equation, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -550.21  -87.32  -36.26   56.68  922.38 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              198.752     40.865   4.864 3.16e-06 ***
## population_density         9.584      1.334   7.182 4.18e-11 ***
## proportion_park_area     -14.275    185.389  -0.077    0.939    
## closest_police_facility  -14.284     12.631  -1.131    0.260    
## num_subway_stops           1.812      1.177   1.540    0.126    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 179.7 on 135 degrees of freedom
## Multiple R-squared:   0.34,  Adjusted R-squared:  0.3204 
## F-statistic: 17.39 on 4 and 135 DF,  p-value: 1.579e-11
## 
## 
## $vif
##      population_density    proportion_park_area closest_police_facility 
##                1.123477                1.073673                1.112365 
##        num_subway_stops 
##                1.032180 
## 
## $moran
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  linearMod$residuals 
## weights: crimedata_nbq_w  
## number of simulations + 1: 3000 
## 
## statistic = 0.19558, observed rank = 2999, p-value = 0.0003333
## alternative hypothesis: greater
# equation 6
linear_results_list[[6]]
## $model
## 
## Call:
## lm(formula = equation, data = data)
## 
## Coefficients:
##             (Intercept)     proportion_park_area  closest_police_facility  
##                 367.832                 -256.239                  -39.488  
##             num_cameras  
##                  -4.988  
## 
## 
## $summary
## 
## Call:
## lm(formula = equation, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -232.04 -127.14  -53.54   40.88 1040.75 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              367.832     42.919   8.570 2.01e-14 ***
## proportion_park_area    -256.239    215.370  -1.190  0.23621    
## closest_police_facility  -39.488     14.355  -2.751  0.00676 ** 
## num_cameras               -4.988     12.918  -0.386  0.70000    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 212.4 on 136 degrees of freedom
## Multiple R-squared:  0.07146,    Adjusted R-squared:  0.05098 
## F-statistic: 3.489 on 3 and 136 DF,  p-value: 0.01757
## 
## 
## $vif
##    proportion_park_area closest_police_facility             num_cameras 
##                1.037584                1.028877                1.012933 
## 
## $moran
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  linearMod$residuals 
## weights: crimedata_nbq_w  
## number of simulations + 1: 3000 
## 
## statistic = 0.33258, observed rank = 3000, p-value = 0.0003333
## alternative hypothesis: greater
# equation 7
linear_results_list[[7]]
## $model
## 
## Call:
## lm(formula = equation, data = data)
## 
## Coefficients:
##              (Intercept)                unemployed  commute_public_transport  
##                 26.83523                   0.01141                   0.03689  
##       population_density      proportion_park_area  
##                  5.39234                 -70.31357  
## 
## 
## $summary
## 
## Call:
## lm(formula = equation, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -430.52  -66.64  -22.01   25.82  814.00 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               26.835225  37.207572   0.721 0.472015    
## unemployed                 0.011410   0.005957   1.915 0.057544 .  
## commute_public_transport   0.036894   0.012524   2.946 0.003795 ** 
## population_density         5.392337   1.524416   3.537 0.000555 ***
## proportion_park_area     -70.313569 166.291306  -0.423 0.673089    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 163.9 on 135 degrees of freedom
## Multiple R-squared:  0.4508, Adjusted R-squared:  0.4346 
## F-statistic: 27.71 on 4 and 135 DF,  p-value: < 2.2e-16
## 
## 
## $vif
##               unemployed commute_public_transport       population_density 
##                 1.874388                 2.793738                 1.761982 
##     proportion_park_area 
##                 1.038220 
## 
## $moran
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  linearMod$residuals 
## weights: crimedata_nbq_w  
## number of simulations + 1: 3000 
## 
## statistic = 0.29882, observed rank = 3000, p-value = 0.0003333
## alternative hypothesis: greater
# equation 8
linear_results_list[[8]]
## $model
## 
## Call:
## lm(formula = equation, data = data)
## 
## Coefficients:
##                   (Intercept)               household_income  
##                     -19.82803                        0.02625  
## no_certificate_diploma_degree                    num_cameras  
##                       0.01469                       -8.85584  
##            population_density  
##                       3.64098  
## 
## 
## $summary
## 
## Call:
## lm(formula = equation, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -406.07  -52.73  -14.07   30.77  748.92 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   -19.828030  27.215403  -0.729  0.46753    
## household_income                0.026250   0.003344   7.851 1.14e-12 ***
## no_certificate_diploma_degree   0.014688   0.006988   2.102  0.03741 *  
## num_cameras                    -8.855844   8.794282  -1.007  0.31574    
## population_density              3.640975   1.255088   2.901  0.00434 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 142 on 135 degrees of freedom
## Multiple R-squared:  0.5879, Adjusted R-squared:  0.5757 
## F-statistic: 48.16 on 4 and 135 DF,  p-value: < 2.2e-16
## 
## 
## $vif
##              household_income no_certificate_diploma_degree 
##                      1.771998                      1.191055 
##                   num_cameras            population_density 
##                      1.050118                      1.591810 
## 
## $moran
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  linearMod$residuals 
## weights: crimedata_nbq_w  
## number of simulations + 1: 3000 
## 
## statistic = 0.22648, observed rank = 3000, p-value = 0.0003333
## alternative hypothesis: greater
# equation 9
linear_results_list[[9]]
## $model
## 
## Call:
## lm(formula = equation, data = data)
## 
## Coefficients:
##                   (Intercept)             low_income_percent  
##                      45.55742                        2.31358  
##            population_density               num_subway_stops  
##                       8.94888                        2.12519  
## no_certificate_diploma_degree  
##                       0.02907  
## 
## 
## $summary
## 
## Call:
## lm(formula = equation, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -510.91  -68.76  -16.35   29.98  980.26 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   45.557423  40.309422   1.130 0.260399    
## low_income_percent             2.313579   1.981853   1.167 0.245114    
## population_density             8.948876   1.252485   7.145 5.09e-11 ***
## num_subway_stops               2.125189   1.098550   1.935 0.055138 .  
## no_certificate_diploma_degree  0.029068   0.008018   3.625 0.000408 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 169.6 on 135 degrees of freedom
## Multiple R-squared:  0.4118, Adjusted R-squared:  0.3944 
## F-statistic: 23.63 on 4 and 135 DF,  p-value: 7.975e-15
## 
## 
## $vif
##            low_income_percent            population_density 
##                      1.181203                      1.110509 
##              num_subway_stops no_certificate_diploma_degree 
##                      1.009740                      1.098619 
## 
## $moran
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  linearMod$residuals 
## weights: crimedata_nbq_w  
## number of simulations + 1: 3000 
## 
## statistic = 0.19267, observed rank = 3000, p-value = 0.0003333
## alternative hypothesis: greater
# equation 10
linear_results_list[[10]]
## $model
## 
## Call:
## lm(formula = equation, data = data)
## 
## Coefficients:
##                   (Intercept)           proportion_park_area  
##                      -9.02102                      -93.41696  
##                    unemployed             population_density  
##                      -0.02447                        3.32138  
## no_certificate_diploma_degree               household_income  
##                       0.03905                        0.03447  
## 
## 
## $summary
## 
## Call:
## lm(formula = equation, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -344.93  -53.85  -15.00   28.11  730.09 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    -9.021021  30.412083  -0.297 0.767211    
## proportion_park_area          -93.416965 140.333806  -0.666 0.506762    
## unemployed                     -0.024474   0.008401  -2.913 0.004195 ** 
## population_density              3.321381   1.237831   2.683 0.008211 ** 
## no_certificate_diploma_degree   0.039047   0.011129   3.509 0.000614 ***
## household_income                0.034469   0.004207   8.194 1.78e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 138.4 on 134 degrees of freedom
## Multiple R-squared:  0.6113, Adjusted R-squared:  0.5968 
## F-statistic: 42.15 on 5 and 134 DF,  p-value: < 2.2e-16
## 
## 
## $vif
##          proportion_park_area                    unemployed 
##                      1.036856                      5.228300 
##            population_density no_certificate_diploma_degree 
##                      1.629149                      3.178776 
##              household_income 
##                      2.951207 
## 
## $moran
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  linearMod$residuals 
## weights: crimedata_nbq_w  
## number of simulations + 1: 3000 
## 
## statistic = 0.18295, observed rank = 2997, p-value = 0.001
## alternative hypothesis: greater
census2016_sf$residuals1 <- residuals(linear_results_list[[1]]$model)
census2016_sf$residuals2 <- residuals(linear_results_list[[2]]$model)
census2016_sf$residuals3 <- residuals(linear_results_list[[3]]$model)
census2016_sf$residuals4 <- residuals(linear_results_list[[4]]$model)
census2016_sf$residuals5 <- residuals(linear_results_list[[5]]$model)

qtm(census2016_sf, "residuals1")
## Variable(s) "residuals1" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

qtm(census2016_sf, "residuals2")
## Variable(s) "residuals2" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

qtm(census2016_sf, "residuals3")
## Variable(s) "residuals3" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

qtm(census2016_sf, "residuals4")
## Variable(s) "residuals4" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

qtm(census2016_sf, "residuals5")
## Variable(s) "residuals5" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

census2016_sf$residuals6 <- residuals(linear_results_list[[6]]$model)
census2016_sf$residuals7 <- residuals(linear_results_list[[7]]$model)
census2016_sf$residuals8 <- residuals(linear_results_list[[8]]$model)
census2016_sf$residuals9 <- residuals(linear_results_list[[9]]$model)
census2016_sf$residuals10 <- residuals(linear_results_list[[10]]$model)

qtm(census2016_sf, "residuals6")
## Variable(s) "residuals6" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

qtm(census2016_sf, "residuals7")
## Variable(s) "residuals7" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

qtm(census2016_sf, "residuals8")
## Variable(s) "residuals8" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

qtm(census2016_sf, "residuals9")
## Variable(s) "residuals9" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

qtm(census2016_sf, "residuals10")
## Variable(s) "residuals10" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

LISA Plots

visualise_LISA(census2016_sf, lisa_results_list[[1]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

visualise_LISA(census2016_sf, lisa_results_list[[2]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

visualise_LISA(census2016_sf, lisa_results_list[[3]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

visualise_LISA(census2016_sf, lisa_results_list[[4]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

visualise_LISA(census2016_sf, lisa_results_list[[5]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

visualise_LISA(census2016_sf, lisa_results_list[[6]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

visualise_LISA(census2016_sf, lisa_results_list[[7]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

visualise_LISA(census2016_sf, lisa_results_list[[8]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

visualise_LISA(census2016_sf, lisa_results_list[[9]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

visualise_LISA(census2016_sf, lisa_results_list[[10]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break

visualize_LISA_clusters(census2016_sf, lisa_results_list[[1]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))

visualize_LISA_clusters(census2016_sf, lisa_results_list[[2]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))

visualize_LISA_clusters(census2016_sf, lisa_results_list[[3]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))

visualize_LISA_clusters(census2016_sf, lisa_results_list[[4]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))

visualize_LISA_clusters(census2016_sf, lisa_results_list[[5]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))

visualize_LISA_clusters(census2016_sf, lisa_results_list[[6]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))

visualize_LISA_clusters(census2016_sf, lisa_results_list[[7]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))

visualize_LISA_clusters(census2016_sf, lisa_results_list[[8]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))

visualize_LISA_clusters(census2016_sf, lisa_results_list[[9]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))

visualize_LISA_clusters(census2016_sf, lisa_results_list[[10]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))

GWR Plots

# Plot first 5 GWR results for each equation
for (i in seq_along(gwr_results_list[1:5])) {
    gwr_result <- gwr_results_list[[i]]
    equation <- equations[[i]]
    
    # Extract GWR spatial data frame
    agwr_sf <- st_as_sf(gwr_result$a.gwr$SDF)
    
    # Visualize GWR result
    gwr_plot <- visualise_gwr(agwr_sf, equation)
    
    # Print GWR plot
    print(gwr_plot)
}
## Variable(s) "unemployed" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "household_income" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "no_certificate_diploma_degree" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

## Variable(s) "unemployed" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "no_certificate_diploma_degree" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "commute_public_transport" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

## Variable(s) "unemployed" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "commute_public_transport" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "single_parents" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

## Variable(s) "num_cameras" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "proportion_park_area" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "closest_police_facility" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

## Variable(s) "proportion_park_area" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "closest_police_facility" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "num_subway_stops" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

for (i in seq_along(gwr_results_list)[6:10]) {
    gwr_result <- gwr_results_list[[i]]
    equation <- equations[[i]]
    
    # Extract GWR spatial data frame
    agwr_sf <- st_as_sf(gwr_result$a.gwr$SDF)
    
    # Visualize GWR result
    gwr_plot <- visualise_gwr(agwr_sf, equation)
    
    # Print GWR plot
    print(gwr_plot)
}
## Variable(s) "proportion_park_area" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "closest_police_facility" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "num_cameras" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

## Variable(s) "commute_public_transport" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "population_density" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "proportion_park_area" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

## Variable(s) "no_certificate_diploma_degree" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "num_cameras" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "population_density" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

## Variable(s) "low_income_percent" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

## Variable(s) "proportion_park_area" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "unemployed" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "population_density" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "no_certificate_diploma_degree" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.